import pytesseract      # 用于OCR识别
import cv2
import numpy as np      # 用于处理图像数据

# TODO 根据系统或者用户确定路径
TESSDATA_DIR = "/usr/share/tesseract-ocr/5/tessdata"
TESSDATA_CMD = "/usr/bin/tesseract"


# 设置Tesseract执行路径
pytesseract.pytesseract.tesseract_cmd = TESSDATA_CMD  # Tesseract 执行路径

def ocr_image(img)->str:
    """OCR识别图像文本（优化预处理）"""
    # 转为灰度图
    img_gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
    # 自适应二值化
    img_bin = cv2.adaptiveThreshold(
        img_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY, 11, 2
    )
    # OCR识别（中英文混合）
    return pytesseract.image_to_string(
        img_gray, lang="chi_sim+eng", 
        config=f"--tessdata-dir {TESSDATA_DIR} --psm 11"
    )
